Final Project

Xinyu (Lily) Wang, Sophie Hartford, Everett Mahaffy

Load packages

library(tidyverse)
library(flextable)
library(here)
library(readxl)
library(modelsummary)
library(janitor)
library(performance)
library(ggplot2)

Load data

# Demographic
age <- read_csv(here("data", "TAG_age_session_dates_4waves.csv"))
age <- clean_names(age)

race <- read_excel(here("data", "TAG_W1_Race_Ethnicity.xlsx"))
race <- clean_names(race)

# Emotion Regulation(ERQ)
erq <- read_csv(here("data", "ERQ_Wave1.csv"))
erq <- erq %>% 
       clean_names() %>% 
       distinct(tagid, .keep_all = TRUE)

# Adolescent Depression(CESDC)
cesdc <- read_csv(here("data", "CESDC_Wave1.csv"))
cesdc <- clean_names(cesdc)

Study Info

We will be utilizing data from the longitudinal study “Transitions in Adolescent Girls,” (Barendse et al., 2020) specifically focusing on data from baseline (Wave 1). This dataset includes 174 participants who were 10-13 at the baseline. Our variables of interest are emotion regulation strategy use and depressive symptoms. These were measured using the Emotion Regulation Questionnaire (ERQ; John & Gross, 2003) and the Center for Epidemiological Studies Depression Scale for Children (CES- DC, Weissman et al., 1980). These are continuous scales which provide total scores for reappraisal and suppression (ERQ) and depressive symptoms (CES-DC).

Participant Demographics

Race/ethnicity category

Clean the data frame: identify subjects reported multiple racial categories and rename them to “Multi-racial”

race_clean <- race %>%
  mutate(
    w1_ethnicity = as.character(w1_ethnicity),
    w1_ethnicity = if_else(
      str_detect(w1_ethnicity, ","),
      "g. Multi-racial",
      w1_ethnicity
    )
  )

Participant demographics cont.

Age

We have subject age data at both sessions and we are interested in calculating the mean age of the two sessions of wave1

age <- age %>% 
  mutate(W1_mean_age = (w1s1_age + w1s2_age)/2) 

# Check mean age for sample
age_summary <- age %>%
  summarize(
    mean_W1_age = mean(W1_mean_age, na.rm = TRUE),
    sd_W1_age   = sd(W1_mean_age, na.rm = TRUE),
  )
print(age_summary)
# A tibble: 1 × 2
  mean_W1_age sd_W1_age
        <dbl>     <dbl>
1        11.6     0.808

Join all data frames

df <- erq %>% 
  left_join(cesdc, by= 'tagid')

df <- df %>% 
     left_join(age, by = 'tagid')

df <- df %>% 
     left_join(race_clean, by = 'tagid') %>% 
     select(tagid, erq_reappraisal_total, erq_suppression_total, ces_dc_total_75perc, W1_mean_age, w1_ethnicity, 
            w1s1_date, w1s1_age, w1s2_date, w1s2_age, w2s1_date, w2s1_age, w2s2_date, w2s2_age, 
            w3s1_date, w3s1_age, w3s2_date, w3s2_age, w4s1_date, w4s1_age, w4s2_date, w4s2_age
            )

df <- df %>%
  rename(
    wave1_session1_date = w1s1_date,
    wave1_session1_age  = w1s1_age,
    wave1_session2_date = w1s2_date,
    wave1_session2_age  = w1s2_age,

    wave2_session1_date = w2s1_date,
    wave2_session1_age  = w2s1_age,
    wave2_session2_date = w2s2_date,
    wave2_session2_age  = w2s2_age,

    wave3_session1_date = w3s1_date,
    wave3_session1_age  = w3s1_age,
    wave3_session2_date = w3s2_date,
    wave3_session2_age  = w3s2_age,

    wave4_session1_date = w4s1_date,
    wave4_session1_age  = w4s1_age,
    wave4_session2_date = w4s2_date,
    wave4_session2_age  = w4s2_age
  )

Tidy and filter

# Tidy
df2 <- df %>%
  mutate(across(starts_with("wave"), as.character))

df_long <- df2 %>%
  pivot_longer(
    cols = starts_with("wave"),
    names_to = c("wave", "session", "info"),
    names_sep = "_",
    values_to = "value"
  ) %>%
  mutate(
    wave    = parse_number(wave),
    session = parse_number(session)
  )

df_tidy <- df_long %>%
  pivot_wider(
    names_from = info,
    values_from = value
  )

# Filter
df_final <- df_tidy %>% 
  filter(wave == 1, session == 1)

Cohort Plot

ggplot(df_tidy, aes(x = age, y = tagid)) +
  geom_line(aes(group = tagid), size = .5, alpha = .4) +
  geom_point(aes(color = as.factor(wave)), size = 2) +
  labs(
    x = "Age",
    y = "TAG Participant",
    title = "Age by Wave",
    color = "Wave"
  ) +
  theme_classic() +
  theme(
    legend.position = "top",
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank()
  )

Sample Summary Statistics

df_names <- df_tidy %>% 
    select(-wave, -session) %>% 
    rename(
    Reappraisal       = erq_reappraisal_total,
    Suppression       = erq_suppression_total,
    Depression        = ces_dc_total_75perc,
    Age               = W1_mean_age,
    Ethnicity         = w1_ethnicity
    )

Summary Stats Table

datasummary_skim(df_names,
                 title = "Table 1. Sample Descriptive Statistics")
Unique Missing Pct. Mean SD Min Median Max Histogram
Reappraisal 31 2 27.4 6.6 6.0 28.0 41.0
Suppression 24 2 13.7 5.2 5.0 13.0 28.0
Depression 49 5 12.8 10.3 0.0 9.5 50.0
Age 141 1 11.6 0.8 10.0 11.7 13.1
Ethnicity N %
a. Black/ African American 8 0.7
b. Hispanic/ Latino/ Chicano 48 4.2
c. Native American or Native Alaskan 8 0.7
d. White / Caucasian 720 63.4
e. Asian 16 1.4
g. Multi-racial 248 21.8
h. Other 16 1.4
j. Decline to respond 8 0.7
NA 64 5.6

RQ 1

Is higher emotion reappraisal associated with lower depression symptoms in adolescent girls?

rq1 <- lm(df_tidy$ces_dc_total_75perc ~ df_tidy$erq_reappraisal_total)
check_model(rq1)

RQ 2

Is higher emotion suppression associated with higher depression symptoms in adolescent girls?

rq2 <- lm(df_tidy$ces_dc_total_75perc ~ df_tidy$erq_suppression_total)
check_model(rq2)

Exploratory

Is higher suppression use associated with higher depression symptoms when adjusting for reappraisal use?

ex1 <- lm(df_tidy$ces_dc_total_75perc ~ df_tidy$erq_suppression_total + df_tidy$erq_reappraisal_total)
check_model(ex1)

Does age moderate the relation between reappraisal/suppression and depression symptoms in adolescent girls?

ex2 <- lm(df_tidy$ces_dc_total_75perc ~ df_tidy$erq_reappraisal_total * df_tidy$W1_mean_age)
check_model(ex2)

ex3 <- lm(df_tidy$ces_dc_total_75perc ~ df_tidy$erq_suppression_total * df_tidy$W1_mean_age)
check_model(ex3)

Results

Primary RQ Tables

modelsummary(
  list(
    "Reappraisal" = rq1,
    "Suppression" = rq2),
  coef_map = c(
    "(Intercept)"                     = "Intercept",
    "df_tidy$erq_reappraisal_total"   = "Reappraisal (ERQ)",
    "df_tidy$erq_suppression_total"   = "Suppression (ERQ)"
  ),
  stars = TRUE,
  title = "Table 2. Model Summary for RQ1 and RQ2"
  )
Table 2. Model Summary for RQ1 and RQ2
Reappraisal Suppression
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
Intercept 22.805*** 3.864***
(1.306) (0.800)
Reappraisal (ERQ) -0.362***
(0.046)
Suppression (ERQ) 0.645***
(0.055)
Num.Obs. 1056 1056
R2 0.055 0.114
R2 Adj. 0.054 0.113
AIC 7890.8 7719.0
BIC 7905.7 7733.9
Log.Lik. -3942.414 -3856.517
F 60.906
RMSE 10.12 9.33

Exploratory Results

modelsummary(
  list(
    "Suppression + Reappraisal" = ex1,
    "Reappraisal × Age Interaction" = ex2,
    "Suppression × Age Interaction" = ex3),
    coef_map = c(
    "(Intercept)"                     = "Intercept",
    "df_tidy$erq_reappraisal_total"   = "Reappraisal (ERQ)",
    "df_tidy$erq_suppression_total"   = "Suppression (ERQ)",
    "df_tidy$erq_reappraisal_total:df_tidy$W1_mean_age" = "Reappraisal x Age",
    "df_tidy$erq_suppression_total:df_tidy$W1_mean_age" = "Suppression x Age"),
  stars = TRUE,
  title = "Table 3. Model Summaries for Multiple Regression and Interaction Models"
  )
Table 3. Model Summaries for Multiple Regression and Interaction Models
Suppression + Reappraisal Reappraisal × Age Interaction Suppression × Age Interaction
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
Intercept 13.733*** 40.986* 2.701
(1.484) (18.413) (11.043)
Reappraisal (ERQ) -0.332*** -1.688**
(0.043) (0.644)
Suppression (ERQ) 0.592*** 0.053
(0.055) (0.743)
Reappraisal x Age 0.116*
(0.056)
Suppression x Age 0.049
(0.064)
Num.Obs. 1032 1056 1056
R2 0.164 0.075 0.119
R2 Adj. 0.162 0.073 0.117
AIC 7504.3 7871.6 7716.9
BIC 7524.1 7896.4 7741.7
Log.Lik. -3748.160 -3930.786 -3853.466
F 28.523
RMSE 9.14 10.01 9.30

Plot 1

ggplot(df_tidy, aes(x=erq_reappraisal_total, y=ces_dc_total_75perc))+
  geom_point(color="Dodgerblue3")+
  geom_smooth(method = lm, color="Aquamarine3")+
  labs(x="Reappraisal", y="Depression")

Plot 2

ggplot(df_tidy,aes(x=erq_suppression_total, y=ces_dc_total_75perc))+
  geom_point(color="Dodgerblue3")+
  geom_smooth(method = lm, color="Aquamarine3")+
  labs(x="Suppression", y="Depression")